Useful things R

Published

Tue, 4 of June, 2024

Modified

Tue, 4 of June, 2024

library(dplyr)
starwars <- starwars

Filter rows in and out with dplyr

Multiple values (same column) in and out

  • %in% c("x", "y") for values
  • !colname for negation
f_1 <- starwars %>% 
    filter(homeworld %in% c("Naboo","Tatooine"))  

f_2 <- starwars %>% 
    # ! in front! 
    filter(!homeworld %in% c("Naboo","Tatooine"))

Multiple columns (some values) in and out

f_3 <- starwars %>% 
    filter(species == "Ewok" & homeworld %in% c("Endor","Tatooine"))  

f_4 <- starwars %>% 
    # ! in front! 
    filter(!species %in% c("Droid","Human") &  !homeworld %in% c("Naboo","Tatooine"))

Keep/Drop rows that have ALL NA values in all columns

# Filter rows that have only NA values in all columns  
cols_allNA <- starwars %>%
  filter(if_all(everything(), is.na))  # 0 

# or drop them 
# Drop rows that have NA values in all columns
cols_noneNA <- starwars %>%
  filter(!if_all(everything(), is.na))  # 87 

Drop rows that have ANY NA values in all columns

# Filter rows that have only NA values in all columns  
cols_anyNA <- starwars %>%
  filter(!if_any(everything(), is.na))  # 58 

Keep/drop rows that have ALL NA values in some columns

# Filter rows that have only NA values in all columns whose name ....
spec_col_allNA <- starwars %>%
 filter(if_all(starts_with("h"), is.na))  # 0 
 # filter(!if_all(starts_with("h"), is.na))  # 0 

DESELECT only numeric columns that only have 0 for any row

# first create one 
starwars2 <- starwars %>% 
    mutate(all0_col  = 0) %>% 
    relocate (all0_col, .before = everything()) %>% 
    ## ----- drop SE hanno tutta la colonna == 0
    select(!where( ~ is.numeric(.x) && sum(.x, na.rm = TRUE) == 0))
    ## ----- or 
    # select(-where( ~ is.numeric(.x) && sum(.x, na.rm = TRUE) == 0))
d <- data.frame(a = c(1,5,56,4,9), 
                     b = c(0,0,NA,0,NA), 
                     c = c(98,67,NA,3,7), 
                     d = c(0,0,0,0,0), 
                     e = c(NA,NA,NA,NA,NA))

# --- removes all NA 
d2 <- d %>% 
    select(!where(~all(is.na(.x))))

# --- removes NA's and Zeros
d3a <- d2 %>% 
    # all() and any() check if all or any values in a vector evaluate to TRUE for some expression
    select(where( ~ !all(is.na(.) | . == 0)))  

# d3b <- d2 %>% 
#   # --- or  
#   select(where( ~ !all_of(is.na(.) | .x == 0))) 

d3c <- d2 %>% 
    # --- or  
    select(!where( ~ is.numeric(.x) && sum(.x, na.rm = TRUE) == 0))

# # --- removes strictly ALL Zeros
# d4 <- d2 %>% 
#   select(!where( ~ contains ("0") ))
#  

Left join dove elimino x e y

  • using suffix argument of left_join
star <- starwars %>% 
    select( 2:5)

star2 <- starwars %>% 
    select( 2, 5:7) 

# join 
star_join <- star %>% 
    left_join(star2,
                 by = c("name"), 
                 # get rid of duplicated cols .... 
                 suffix = c("", ".y")) %>% 
    select(-ends_with(".y"))